df <- read_csv("./data/combinedstatshot.csv")
## Rows: 165 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): player, dunk_tot, dunk_pct, rim_tot, rim_pct, rim_asted, other2pt_...
## dbl (24): games, games_started, mp_per_g, fg_per_g, fga_per_g, fg_pct, fg2_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- df |> select(!c("games_started", "pf_per_g"))
colnames(df)
##  [1] "player"         "dunk_tot"       "dunk_pct"       "rim_tot"       
##  [5] "rim_pct"        "rim_asted"      "other2pt_tot"   "other2pt_pct"  
##  [9] "other2pt_asted" "3pt_tot"        "3pt_pct"        "3pt_asted"     
## [13] "games"          "mp_per_g"       "fg_per_g"       "fga_per_g"     
## [17] "fg_pct"         "fg2_per_g"      "fg2a_per_g"     "fg2_pct"       
## [21] "fg3_per_g"      "fg3a_per_g"     "fg3_pct"        "ft_per_g"      
## [25] "fta_per_g"      "ft_pct"         "orb_per_g"      "drb_per_g"     
## [29] "trb_per_g"      "ast_per_g"      "stl_per_g"      "blk_per_g"     
## [33] "tov_per_g"      "pts_per_g"

Plot 2pt attempts to 3pt attempts

From success script

path <- "~/BruinSports/data/draftdata.csv"
df_career_stats <- read_csv(path)
## Rows: 960 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): team_id, player, college_name, skip
## dbl (19): pick_overall, seasons, g, mp, pts, trb, ast, fg_pct, fg3_pct, ft_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
drop_cols <- c('team_id', 'skip', 'mp', 'pts', 'trb', 'ast')

df_career_stats <- df_career_stats |> select(!drop_cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(drop_cols)
## 
##   # Now:
##   data %>% select(all_of(drop_cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df_lot_picks <- df_career_stats |> filter(pick_overall < 15)

colnames(df_career_stats)
##  [1] "pick_overall" "player"       "college_name" "seasons"      "g"           
##  [6] "fg_pct"       "fg3_pct"      "ft_pct"       "mp_per_g"     "pts_per_g"   
## [11] "trb_per_g"    "ast_per_g"    "ws"           "ws_per_48"    "bpm"         
## [16] "vorp"         "year"
# plotting the distribution of points + rebounds + asts (PRA) per game
df_lot_picks <- df_lot_picks |> mutate(
  pra_per_g = pts_per_g + trb_per_g + ast_per_g,
  pick_overall = factor(pick_overall)
)

# Group by draft position, and getting the mean
draft_means <- df_lot_picks |> group_by(pick_overall) |>
  summarize(avg_mpg = mean(mp_per_g),
            avg_ppg = mean(pts_per_g),
            avg_trbpg = mean(trb_per_g),
            avg_apg = mean(ast_per_g),
            avg_prapg = mean(pra_per_g))

print(draft_means, n = 14)
## # A tibble: 14 × 6
##    pick_overall avg_mpg avg_ppg avg_trbpg avg_apg avg_prapg
##    <fct>          <dbl>   <dbl>     <dbl>   <dbl>     <dbl>
##  1 1               31.2   18.8       6.52    4.32      29.7
##  2 2               26.9   13.7       4.96    2.88      21.6
##  3 3               30.0   17.6       6.3     3.52      27.4
##  4 4               27.5   12.8       5.39    2.48      20.7
##  5 5               26.0   12.7       4.92    3.55      21.2
##  6 6               23.2   10.2       4.35    2.26      16.8
##  7 7               27.6   13.4       4.88    2.77      21.0
##  8 8               21.1    8.82      3.37    1.77      14.0
##  9 9               24.5   10.9       4.55    2.52      18.0
## 10 10              23.5   10.4       3.61    2.31      16.3
## 11 11              21.1   10.2       3.69    2.16      16.0
## 12 12              24.6   10.6       4.47    2.23      17.3
## 13 13              22.6   10.9       3.99    2.23      17.1
## 14 14              20.3    8.78      3.75    1.3       13.8
df_lot_picks |> ggplot(aes(x = pick_overall, y = pra_per_g)) +
  geom_boxplot() + 
  labs(x = "Draft Pick", y = "Points-Rebounds-Assists Per Game")

Plotting the players in the top 25% in PRA per game

df2 <- left_join(df, df_lot_picks, by = c("player"), suffix = c("_college", "_nba"))
df2 <- df2 |> 
  separate_wider_delim(dunk_tot, delim = "-", names = c("dunk_made", "dunk_attempts"))

df2 <- df2 |>
  separate_wider_delim(rim_tot, delim = "-", names = c("rim_made", "rim_attempts"))

df2 <- df2 |>
  separate_wider_delim(other2pt_tot, delim = "-", names = c("other2pt_made", "other2pt_attempts"))

df2 <-df2 |>
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, other2pt_attempts), as.numeric))

defining bust metric

df2 <- df2 |> mutate(
  vorp_per_g = vorp / g
)

df_top_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.7)))

df_bottom_players <- df2 |> 
  group_by(pick_overall) |>
  summarize(across(c(pts_per_g_nba, trb_per_g_nba, ast_per_g_nba, pra_per_g, vorp_per_g), function(x) quantile(x, probs = 0.3)))

# metric favors big men
# make the rebound percentile higher

is_not_bust <- function(pick_number, df_top_players = df_top_players, df2 = df2) {
  ppg <- df_top_players |> pull(pts_per_g_nba)
  apg <- df_top_players |> pull(ast_per_g_nba)
  prapg <- df_top_players |> pull(pra_per_g)
  
  df_top_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.8))
  
  rpg <- df_top_rb |> pull(trb_per_g)
  
  df_top_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = median(vorp_per_g))
  
  vorppg <- df_top_vorp |> pull(vorp_per_g)
  
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter(pra_per_g >= prapg[[pick_number]] | ast_per_g_nba >= apg[[pick_number]] | trb_per_g_nba >= rpg[[pick_number]] | pts_per_g_nba >= ppg[[pick_number]]) |>
    filter(vorp_per_g >= vorppg[[pick_number]]) 
  
  # must also have played at least most of their career in the nba
  df <- df |> filter(seasons >= 4 / 5 * (2023-year))
  
  return(df)
}

is_bust <- function(pick_number, df_bottom_players = df_bottom_players, df2 = df2) {
  ppg <- df_bottom_players |> pull(pts_per_g_nba)
  
  apg <- df_bottom_players |> pull(ast_per_g_nba)
  prapg <- df_bottom_players |> pull(pra_per_g)
  
  df_bottom_rb <- df2 |> 
    group_by(pick_overall) |>
    summarize(trb_per_g = quantile(trb_per_g_nba, probs = 0.4))
  
  rpg <- df_bottom_rb |> pull(trb_per_g)
  
  df_bottom_vorp <- df2 |> 
    group_by(pick_overall) |>
    summarize(vorp_per_g = quantile(vorp_per_g, probs = 0.3))
  
  vorppg <- df_bottom_vorp |> pull(vorp_per_g)
  
  # playing less than half the seasons since drafted makes you a bust
  df <- df2 |> filter(pick_overall == pick_number) |>
  filter((pra_per_g < prapg[[pick_number]] & ast_per_g_nba < apg[[pick_number]] & trb_per_g_nba < rpg[[pick_number]] & pts_per_g_nba < ppg[[pick_number]] & vorp_per_g < vorppg[[pick_number]]) | seasons < 1 / 2 * (2023-year))
  
  
  return(df)
}
# first overall picks (not bust)
df_pick_1 <- is_not_bust(1, df_top_players, df2)

# first overall picks (bust)
df_pick_1_bust <- is_bust(1, df_bottom_players, df2)

df_pick_1
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 John Wall              33            36 91.7%         116          182 63.7%  
## 2 Kyrie Irving            0             0 0%             26           39 66.7%  
## 3 Anthony Davis          96            98 98.0%         152          174 87.4%  
## 4 Karl-Anthony T…        22            24 91.7%          87          121 71.9%  
## 5 Ben Simmons            56            61 91.8%         159          220 72.3%  
## 6 Zion Williamson        72            79 91.1%         247          313 78.9%  
## 7 Anthony Edwards        27            27 100.0%         89          129 69.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_1_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Anthony Bennett        53            58 91.4%         100          140 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# second overall
df_pick_2 <- is_not_bust(2, df_top_players, df2)
df_pick_2_bust <- is_bust(2, df_bottom_players, df2)

df_pick_2
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 D'Angelo Russe…         4             4 100.0%         70          110 63.6%  
## 2 Brandon Ingram         17            17 100.0%         69          117 59.0%  
## 3 Lonzo Ball             37            40 92.5%          94          120 78.3%  
## 4 Ja Morant              28            31 90.3%         160          264 60.6%  
## 5 Chet Holmgren          57            57 100.0%        105          125 84.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_2_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Derrick Willia…        56            60 93.3%         135          188 71.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# third overall
df_pick_3 <- is_not_bust(3, df_top_players, df2)
df_pick_3_bust <- is_bust(3, df_bottom_players, df2)

df_pick_3
## # A tibble: 4 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Bradley Beal        18            20 90.0%          89          137 65.0%  
## 2 Joel Embiid         30            30 100.0%         80           99 80.8%  
## 3 Jayson Tatum        18            21 85.7%          79          126 62.7%  
## 4 Evan Mobley         63            66 95.5%         113          144 78.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_3_bust
## # A tibble: 1 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Jahlil Okafor        64            67 95.5%         213          270 78.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourth overall
df_pick_4 <- is_not_bust(4, df_top_players, df2)
df_pick_4_bust <- is_bust(4, df_bottom_players, df2)

df_pick_4
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Aaron Gordon           54            56 96.4%         137          198 69.2%  
## 2 Jaren Jackson …        31            31 100.0%         61           93 65.6%  
## 3 Scottie Barnes         19            21 90.5%          61           89 68.5%  
## 4 Keegan Murray          63            67 94.0%         196          277 70.8%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_4_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# fifth overall
df_pick_5 <- is_not_bust(5, df_top_players, df2)
df_pick_5_bust <- is_bust(5, df_bottom_players, df2)

df_pick_5
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 DeMarcus Cousi…        53            57 93.0%         144          189 76.2%  
## 2 De'Aaron Fox           20            21 95.2%         131          203 64.5%  
## 3 Trae Young              0             0 0%            105          201 52.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_5_bust
## # A tibble: 1 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Thomas Robinson        70            83 84.3%         169          262 64.5%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# sixth overall
df_pick_6 <- is_not_bust(6, df_top_players, df2)
df_pick_6_bust <- is_bust(6, df_bottom_players, df2)

df_pick_6
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Damian Lillard        13            17 76.5%          98          169 58.0%  
## 2 Nerlens Noel          48            50 96.0%          76           99 76.8%  
## 3 Marcus Smart          16            18 88.9%          78          110 70.9%  
## 4 Buddy Hield           18            22 81.8%         119          178 66.9%  
## 5 Onyeka Okongwu        58            61 95.1%         135          186 72.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_6_bust
## # A tibble: 1 × 55
##   player    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>         <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ekpe Udoh        30            32 93.8%          78          109 71.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# seventh overall
df_pick_7 <- is_not_bust(7, df_top_players, df2)
df_pick_7_bust <- is_bust(7, df_bottom_players, df2)

df_pick_7
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Julius Randle          37            40 92.5%         132          197 67.0%  
## 2 Jamal Murray           18            19 94.7%          77          111 69.4%  
## 3 Lauri Markkanen        20            24 83.3%          65          100 65.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_7_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ben McLemore        44            45 97.8%          90          126 71.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eighth overall
df_pick_8 <- is_not_bust(8, df_top_players, df2)
df_pick_8_bust <- is_bust(8, df_bottom_players, df2)

df_pick_8
## # A tibble: 3 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Al-Farouq Aminu        46            48 95.8%         112          173 64.7%  
## 2 Kentavious Cal…        15            16 93.8%          63           94 67.0%  
## 3 Franz Wagner           11            11 100.0%         63           93 67.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_8_bust
## # A tibble: 0 × 55
## # ℹ 55 variables: player <chr>, dunk_made <dbl>, dunk_attempts <dbl>,
## #   dunk_pct <chr>, rim_made <dbl>, rim_attempts <dbl>, rim_pct <chr>,
## #   rim_asted <chr>, other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <chr>, other2pt_asted <chr>, 3pt_tot <chr>, 3pt_pct <chr>,
## #   3pt_asted <chr>, games <dbl>, mp_per_g_college <dbl>, fg_per_g <dbl>,
## #   fga_per_g <dbl>, fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, …
# ninth overall
df_pick_9 <- is_not_bust(9, df_top_players, df2)
df_pick_9_bust <- is_bust(9, df_bottom_players, df2)

df_pick_9
## # A tibble: 5 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Gordon Hayward        19            20 95.0%          89          128 69.5%  
## 2 Kemba Walker           3             3 100.0%        115          196 58.7%  
## 3 Andre Drummond        80            89 89.9%         130          185 70.3%  
## 4 Trey Burke             9             9 100.0%         67          105 63.8%  
## 5 Jakob Poeltl          32            34 94.1%         199          284 70.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_9_bust
## # A tibble: 1 × 55
##   player     dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>          <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kevin Knox        18            20 90.0%          65           99 65.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# tenth overall
df_pick_10 <- is_not_bust(10, df_top_players, df2)
df_pick_10_bust <- is_bust(10, df_bottom_players, df2)

df_pick_10
## # A tibble: 5 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Paul George          18            22 81.8%          70          106 66.0%  
## 2 CJ McCollum           3             3 100.0%         34           63 54.0%  
## 3 Elfrid Payton        21            24 87.5%         169          247 68.4%  
## 4 Mikal Bridges        35            42 83.3%         109          161 67.7%  
## 5 Jalen Smith          49            52 94.2%         114          158 72.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_10_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ziaire Williams        10            11 90.9%          26           49 53.1%  
## 2 Johnny Davis           16            19 84.2%          89          143 62.2%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# eleventh overall
df_pick_11 <- is_not_bust(11, df_top_players, df2)
df_pick_11_bust <- is_bust(11, df_bottom_players, df2)

df_pick_11
## # A tibble: 4 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Klay Thompson           8             8 100.0%         66          110 60.0%  
## 2 Myles Turner           11            13 84.6%          40           54 74.1%  
## 3 Domantas Sabon…        22            24 91.7%         157          214 73.4%  
## 4 Shai Gilgeous-…        11            11 100.0%        108          182 59.3%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_11_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 James Bouknight        12            12 100.0%         52           79 65.8%  
## 2 Jett Howard             6             6 100.0%         29           47 61.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# twelveth overall
df_pick_12 <- is_not_bust(12, df_top_players, df2)
df_pick_12_bust <- is_bust(12, df_bottom_players, df2)

df_pick_12
## # A tibble: 5 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Steven Adams           29            33 87.9%          85          129 65.9%  
## 2 Miles Bridges          30            35 85.7%          84          128 65.6%  
## 3 Tyrese Halibur…         7             8 87.5%          46           62 74.2%  
## 4 Jalen Williams         25            27 92.6%         124          186 66.7%  
## 5 Dereck Lively …        54            55 98.2%          74           96 77.1%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_12_bust
## # A tibble: 1 × 55
##   player       dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>            <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Xavier Henry        17            17 100.0%         60           90 66.7%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# thirteenth overall
df_pick_13 <- is_not_bust(13, df_top_players, df2)
df_pick_13_bust <- is_bust(13, df_bottom_players, df2)

df_pick_13
## # A tibble: 7 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Ed Davis               26            27 96.3%          42           50 84.0%  
## 2 Kelly Olynyk           25            28 89.3%         152          212 71.7%  
## 3 Zach LaVine            21            25 84.0%          51           90 56.7%  
## 4 Devin Booker            8             9 88.9%          42           59 71.2%  
## 5 Donovan Mitche…         9            13 69.2%          64          116 55.2%  
## 6 Tyler Herro             4             5 80.0%          56           84 66.7%  
## 7 Jalen Duren            70            76 92.1%         111          152 73.0%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_13_bust
## # A tibble: 2 × 55
##   player          dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>               <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Kendall Marsha…         0             0 0%             35           53 66.0%  
## 2 Jerome Robinson        12            13 92.3%          98          157 62.4%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# fourteenth overall
df_pick_14 <- is_not_bust(14, df_top_players, df2)
df_pick_14_bust <- is_bust(14, df_bottom_players, df2)

df_pick_14
## # A tibble: 4 × 55
##   player        dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>             <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Marcus Morris        31            33 93.9%         114          147 77.6%  
## 2 T.J. Warren          37            37 100.0%        192          251 76.5%  
## 3 Cameron Payne         3             3 100.0%         53           87 60.9%  
## 4 Bam Adebayo          99           105 94.3%         138          185 74.6%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
df_pick_14_bust
## # A tibble: 1 × 55
##   player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##   <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
## 1 Romeo Langford        10            14 71.4%          91          138 65.9%  
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ft_pct_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, …
# combining dataframes

df_good <- bind_rows(df_pick_1, df_pick_2, df_pick_3, df_pick_4, df_pick_5, df_pick_6, df_pick_7, df_pick_8, df_pick_9,
              df_pick_10, df_pick_11, df_pick_12, df_pick_13, df_pick_14)

df_busts <- bind_rows(df_pick_1_bust, df_pick_2_bust, df_pick_3_bust, df_pick_4_bust, df_pick_5_bust, df_pick_6_bust, df_pick_7_bust, df_pick_8_bust, df_pick_9_bust, df_pick_10_bust, df_pick_11_bust, df_pick_12_bust, df_pick_13_bust, df_pick_14_bust)

print(df_good, n = 20)
## # A tibble: 64 × 55
##    player         dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <chr>              <dbl>         <dbl> <chr>       <dbl>        <dbl> <chr>  
##  1 John Wall             33            36 91.7%         116          182 63.7%  
##  2 Kyrie Irving           0             0 0%             26           39 66.7%  
##  3 Anthony Davis         96            98 98.0%         152          174 87.4%  
##  4 Karl-Anthony …        22            24 91.7%          87          121 71.9%  
##  5 Ben Simmons           56            61 91.8%         159          220 72.3%  
##  6 Zion Williams…        72            79 91.1%         247          313 78.9%  
##  7 Anthony Edwar…        27            27 100.0%         89          129 69.0%  
##  8 D'Angelo Russ…         4             4 100.0%         70          110 63.6%  
##  9 Brandon Ingram        17            17 100.0%         69          117 59.0%  
## 10 Lonzo Ball            37            40 92.5%          94          120 78.3%  
## 11 Ja Morant             28            31 90.3%         160          264 60.6%  
## 12 Chet Holmgren         57            57 100.0%        105          125 84.0%  
## 13 Bradley Beal          18            20 90.0%          89          137 65.0%  
## 14 Joel Embiid           30            30 100.0%         80           99 80.8%  
## 15 Jayson Tatum          18            21 85.7%          79          126 62.7%  
## 16 Evan Mobley           63            66 95.5%         113          144 78.5%  
## 17 Aaron Gordon          54            56 96.4%         137          198 69.2%  
## 18 Jaren Jackson…        31            31 100.0%         61           93 65.6%  
## 19 Scottie Barnes        19            21 90.5%          61           89 68.5%  
## 20 Keegan Murray         63            67 94.0%         196          277 70.8%  
## # ℹ 44 more rows
## # ℹ 48 more variables: rim_asted <chr>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <chr>, other2pt_asted <chr>,
## #   `3pt_tot` <chr>, `3pt_pct` <chr>, `3pt_asted` <chr>, games <dbl>,
## #   mp_per_g_college <dbl>, fg_per_g <dbl>, fga_per_g <dbl>,
## #   fg_pct_college <dbl>, fg2_per_g <dbl>, fg2a_per_g <dbl>, fg2_pct <dbl>,
## #   fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_pct_college <dbl>, ft_per_g <dbl>, …
df_good |> pull(player)
##  [1] "John Wall"                "Kyrie Irving"            
##  [3] "Anthony Davis"            "Karl-Anthony Towns"      
##  [5] "Ben Simmons"              "Zion Williamson"         
##  [7] "Anthony Edwards"          "D'Angelo Russell"        
##  [9] "Brandon Ingram"           "Lonzo Ball"              
## [11] "Ja Morant"                "Chet Holmgren"           
## [13] "Bradley Beal"             "Joel Embiid"             
## [15] "Jayson Tatum"             "Evan Mobley"             
## [17] "Aaron Gordon"             "Jaren Jackson Jr."       
## [19] "Scottie Barnes"           "Keegan Murray"           
## [21] "DeMarcus Cousins"         "De'Aaron Fox"            
## [23] "Trae Young"               "Damian Lillard"          
## [25] "Nerlens Noel"             "Marcus Smart"            
## [27] "Buddy Hield"              "Onyeka Okongwu"          
## [29] "Julius Randle"            "Jamal Murray"            
## [31] "Lauri Markkanen"          "Al-Farouq Aminu"         
## [33] "Kentavious Caldwell-Pope" "Franz Wagner"            
## [35] "Gordon Hayward"           "Kemba Walker"            
## [37] "Andre Drummond"           "Trey Burke"              
## [39] "Jakob Poeltl"             "Paul George"             
## [41] "CJ McCollum"              "Elfrid Payton"           
## [43] "Mikal Bridges"            "Jalen Smith"             
## [45] "Klay Thompson"            "Myles Turner"            
## [47] "Domantas Sabonis"         "Shai Gilgeous-Alexander" 
## [49] "Steven Adams"             "Miles Bridges"           
## [51] "Tyrese Haliburton"        "Jalen Williams"          
## [53] "Dereck Lively II"         "Ed Davis"                
## [55] "Kelly Olynyk"             "Zach LaVine"             
## [57] "Devin Booker"             "Donovan Mitchell"        
## [59] "Tyler Herro"              "Jalen Duren"             
## [61] "Marcus Morris"            "T.J. Warren"             
## [63] "Cameron Payne"            "Bam Adebayo"
good_list <- df_good |> pull(player)


df_busts |> pull(player)
##  [1] "Anthony Bennett"  "Derrick Williams" "Jahlil Okafor"    "Thomas Robinson" 
##  [5] "Ekpe Udoh"        "Ben McLemore"     "Kevin Knox"       "Ziaire Williams" 
##  [9] "Johnny Davis"     "James Bouknight"  "Jett Howard"      "Xavier Henry"    
## [13] "Kendall Marshall" "Jerome Robinson"  "Romeo Langford"
bust_list <- df_busts |> pull(player)
plot_good <- ggplot(df_good, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(color = "green", size = 4, alpha = 0.5) +
  geom_label_repel(data = df_good, aes(label = player), size = 1.5, max.overlaps = 20) +
  labs(title = "CBB Shot Selection for Good Value NBA Lottery Picks", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()

plot_busts <- ggplot(df_busts, aes(x = fg2_per_g, y = fg3_per_g, label = player)) +
  geom_point(color = "red", size = 4, alpha = 0.5) +
  geom_label_repel(size = 3) +
  labs(title = "CBB Shot Selection for NBA Lottery Busts", x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game") +
  theme_bw()


plot_combined <- ggplot(df2, aes(x = fg2_per_g, y = fg3_per_g)) +
  geom_point(aes(color = "Average value"), size = 4, alpha = 0.2) +
  geom_point(data = df_busts, aes(color = "Bad value"), size = 4, alpha = 0.5) +
  geom_point(data = df_good, aes(color = "Good value"), size = 4, alpha = 0.5) +
  labs(title = "CBB Shot Selection for NBA Lottery Picks", 
       x = "2PT Field Goal Makes per game", y = "3PT Field Goal Makes per game", color = "Value") +
  scale_color_manual(values = c("Bad value" = "red", "Good value" = "green", "Average value" = "grey")) +
  theme_bw()

plot_combined

plot_busts

plot_good

Principal Component Analysis of college stats, for grouping/covariance

library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preparing Data

colnames(df2)
##  [1] "player"            "dunk_made"         "dunk_attempts"    
##  [4] "dunk_pct"          "rim_made"          "rim_attempts"     
##  [7] "rim_pct"           "rim_asted"         "other2pt_made"    
## [10] "other2pt_attempts" "other2pt_pct"      "other2pt_asted"   
## [13] "3pt_tot"           "3pt_pct"           "3pt_asted"        
## [16] "games"             "mp_per_g_college"  "fg_per_g"         
## [19] "fga_per_g"         "fg_pct_college"    "fg2_per_g"        
## [22] "fg2a_per_g"        "fg2_pct"           "fg3_per_g"        
## [25] "fg3a_per_g"        "fg3_pct_college"   "ft_per_g"         
## [28] "fta_per_g"         "ft_pct_college"    "orb_per_g"        
## [31] "drb_per_g"         "trb_per_g_college" "ast_per_g_college"
## [34] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [37] "pts_per_g_college" "pick_overall"      "college_name"     
## [40] "seasons"           "g"                 "fg_pct_nba"       
## [43] "fg3_pct_nba"       "ft_pct_nba"        "mp_per_g_nba"     
## [46] "pts_per_g_nba"     "trb_per_g_nba"     "ast_per_g_nba"    
## [49] "ws"                "ws_per_48"         "bpm"              
## [52] "vorp"              "year"              "pra_per_g"        
## [55] "vorp_per_g"
df3 <- column_to_rownames(df2, var = "player")

df_cbb <- df3 |> select(
  dunk_made, dunk_attempts, dunk_pct, rim_made, rim_attempts, rim_pct, rim_asted,
  other2pt_made, other2pt_attempts, other2pt_pct, other2pt_asted, fg2_pct, fg3_per_g, fg3a_per_g, 
 `3pt_asted`, games, ft_per_g, fta_per_g, ast_per_g_college, orb_per_g, drb_per_g,
  stl_per_g, blk_per_g, tov_per_g, pts_per_g_college
) |> rename(fg3_asted = `3pt_asted`) |> 
  mutate(dunk_pct = parse_number(dunk_pct) / 100, # changing to percentages
         rim_pct = parse_number(rim_pct) / 100,
         rim_asted = parse_number(rim_asted) / 100,
         other2pt_pct = parse_number(other2pt_pct) / 100,
         other2pt_asted = parse_number(other2pt_asted) / 100,
         fg3_asted = parse_number(fg3_asted) / 100,
         fg3_pct_per_g = fg3_per_g / fg3a_per_g)

df_cbb <- df_cbb |> 
  mutate(fg3_pct_per_g = coalesce(fg3_pct_per_g, 0)) |>
  relocate(fg3_pct_per_g, .after = fg3_asted)

to_per_game <- function(x, games) {
  x <- x / games
  return(x)
}

df_cbb <- df_cbb |> # making everything per game
  mutate(across(c(dunk_made, dunk_attempts, rim_made, rim_attempts, other2pt_made, 
                  other2pt_attempts), function(x) to_per_game(x, games)))

colSums(is.na(df_cbb))
##         dunk_made     dunk_attempts          dunk_pct          rim_made 
##                 0                 0                 0                 0 
##      rim_attempts           rim_pct         rim_asted     other2pt_made 
##                 0                 0                 0                 0 
## other2pt_attempts      other2pt_pct    other2pt_asted           fg2_pct 
##                 0                 0                 0                 0 
##         fg3_per_g        fg3a_per_g         fg3_asted     fg3_pct_per_g 
##                 0                 0                 0                 0 
##             games          ft_per_g         fta_per_g ast_per_g_college 
##                 0                 0                 0                 0 
##         orb_per_g         drb_per_g         stl_per_g         blk_per_g 
##                 0                 0                 0                 0 
##         tov_per_g pts_per_g_college 
##                 0                 0

Following this guide: https://www.datacamp.com/tutorial/pca-analysis-r

df_cbb_scaled <- as_tibble(scale(df_cbb |> select(-games)))
df_cbb_scaled
## # A tibble: 165 × 25
##    dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct rim_asted
##        <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>     <dbl>
##  1     0.338        0.359    0.125    0.886        1.15    -0.582   -0.936 
##  2    -0.789       -0.813    0.332   -0.925       -1.11     1.48    -1.47  
##  3     1.25         1.23     0.325    1.46         1.11     1.46     0.662 
##  4     0.599        0.655    0.0208   0.478        0.382    0.591    0.733 
##  5     1.13         1.15     0.215    1.44         1.18     1.16     0.633 
##  6     0.246        0.233    0.270    0.0605      -0.0262   0.521   -0.0688
##  7    -0.560       -0.589    0.387    0.233        0.367   -0.443    0.0432
##  8     0.102        0.0619   0.408   -0.246       -0.174   -0.443    0.0668
##  9    -0.635       -0.661    0.353   -0.687       -0.754    0.228    0.615 
## 10    -0.619       -0.573   -0.560   -0.839       -0.861   -0.261   -0.623 
## # ℹ 155 more rows
## # ℹ 18 more variables: other2pt_made <dbl>, other2pt_attempts <dbl>,
## #   other2pt_pct <dbl>, other2pt_asted <dbl>, fg2_pct <dbl>, fg3_per_g <dbl>,
## #   fg3a_per_g <dbl>, fg3_asted <dbl>, fg3_pct_per_g <dbl>, ft_per_g <dbl>,
## #   fta_per_g <dbl>, ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>,
## #   stl_per_g <dbl>, blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
# df_cbb_scaled$player <- df_cbb$player
# df_cbb_scaled <- df_cbb_scaled |> relocate(player, .before = dunk_made)

corr_matrix <- cor(df_cbb_scaled)
colnames(corr_matrix)
##  [1] "dunk_made"         "dunk_attempts"     "dunk_pct"         
##  [4] "rim_made"          "rim_attempts"      "rim_pct"          
##  [7] "rim_asted"         "other2pt_made"     "other2pt_attempts"
## [10] "other2pt_pct"      "other2pt_asted"    "fg2_pct"          
## [13] "fg3_per_g"         "fg3a_per_g"        "fg3_asted"        
## [16] "fg3_pct_per_g"     "ft_per_g"          "fta_per_g"        
## [19] "ast_per_g_college" "orb_per_g"         "drb_per_g"        
## [22] "stl_per_g"         "blk_per_g"         "tov_per_g"        
## [25] "pts_per_g_college"
ggcorrplot(corr_matrix, method = "square")

ggcorrplot(corr_matrix, method = "square", hc.order = TRUE,
           type = "lower", tl.cex = 7, title = "Correlations between different college statistics")

Guides: http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials/

Kmeans Clustering: https://medium.com/@zullinira23/implementation-of-principal-component-analysis-pca-on-k-means-clustering-in-r-794f03ec15f

df_cbb.sample <- df_cbb |> sample_frac(1, replace = FALSE)
head(df_cbb.sample)
##                     dunk_made dunk_attempts dunk_pct rim_made rim_attempts
## Lonzo Ball         1.02777778    1.11111111    0.925 2.611111     3.333333
## Meyers Leonard     0.78461538    0.86153846    0.911 1.676923     2.215385
## Wendell Carter Jr. 1.51351351    1.56756757    0.966 3.243243     4.729730
## Derrick Williams   0.81159420    0.86956522    0.933 1.956522     2.724638
## Davion Mitchell    0.03333333    0.03333333    1.000 1.166667     1.850000
## Moses Moody        0.31250000    0.34375000    0.909 1.937500     3.437500
##                    rim_pct rim_asted other2pt_made other2pt_attempts
## Lonzo Ball           0.783     0.511     0.4166667         0.8055556
## Meyers Leonard       0.757     0.798     0.9230769         2.0923077
## Wendell Carter Jr.   0.686     0.608     1.0810811         2.6216216
## Derrick Williams     0.718     0.585     0.7101449         1.7101449
## Davion Mitchell      0.631     0.186     0.4166667         0.9500000
## Moses Moody          0.564     0.371     1.4062500         3.5625000
##                    other2pt_pct other2pt_asted fg2_pct fg3_per_g fg3a_per_g
## Lonzo Ball                0.517          0.200   0.732       2.2        5.4
## Meyers Leonard            0.441          0.500   0.585       0.0        0.2
## Wendell Carter Jr.        0.412          0.500   0.586       0.5        1.2
## Derrick Williams          0.415          0.388   0.598       0.7        1.3
## Davion Mitchell           0.439          0.080   0.519       1.6        4.1
## Moses Moody               0.395          0.311   0.478       1.8        5.1
##                    fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
## Lonzo Ball             0.738     0.4074074    36      1.8       2.7
## Meyers Leonard         1.000     0.0000000    65      1.6       2.2
## Wendell Carter Jr.     0.947     0.4166667    37      3.4       4.5
## Derrick Williams       0.929     0.5384615    69      5.9       8.2
## Davion Mitchell        0.603     0.3902439    60      1.6       2.4
## Moses Moody            0.897     0.3529412    32      4.7       5.8
##                    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g
## Lonzo Ball                       7.6       0.9       5.1       1.8       0.8
## Meyers Leonard                   0.7       1.3       3.4       0.3       1.1
## Wendell Carter Jr.               2.0       2.9       6.1       0.8       2.1
## Derrick Williams                 0.9       2.5       5.2       0.8       0.7
## Davion Mitchell                  4.7       0.4       2.3       1.7       0.4
## Moses Moody                      1.6       2.0       3.8       1.0       0.7
##                    tov_per_g pts_per_g_college
## Lonzo Ball               2.5              14.6
## Meyers Leonard           1.4               7.7
## Wendell Carter Jr.       2.0              13.5
## Derrick Williams         2.3              17.8
## Davion Mitchell          2.3              12.0
## Moses Moody              1.6              16.8
df_cbb.pca <- PCA(df_cbb.sample |> select(-games), scale.unit = TRUE, graph = FALSE)

fviz_eig(df_cbb.pca, addlabels = TRUE, main = "Statistics Represented in Lower Dimensional Components")

fviz_pca_var(df_cbb.pca, col.var = "cos2", gradient.cols = c("lightblue", "black"), repel = TRUE)

var <- get_pca_var(df_cbb.pca)

fviz_cos2(df_cbb.pca, choice = "var", axes = 1:2) + 
  labs(title = "Quality of Representation to PCA Dimensions 1 and 2")

fviz_pca_ind(df_cbb.pca, repel = TRUE, labelsize = 1) +
  labs(title = "NBA Lottery Picks on PCA Dimensions 1 and 2")

summary(df_cbb.pca)
## 
## Call:
## PCA(X = select(df_cbb.sample, -games), scale.unit = TRUE, graph = FALSE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               7.916   5.341   2.094   1.495   1.261   1.201   1.002
## % of var.             31.663  21.364   8.376   5.978   5.045   4.803   4.008
## Cumulative % of var.  31.663  53.027  61.403  67.381  72.426  77.229  81.237
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.967   0.751   0.556   0.446   0.420   0.327   0.267
## % of var.              3.867   3.006   2.225   1.786   1.679   1.310   1.067
## Cumulative % of var.  85.104  88.110  90.335  92.121  93.800  95.110  96.177
##                       Dim.15  Dim.16  Dim.17  Dim.18  Dim.19  Dim.20  Dim.21
## Variance               0.258   0.213   0.164   0.141   0.072   0.062   0.024
## % of var.              1.032   0.850   0.655   0.563   0.286   0.249   0.097
## Cumulative % of var.  97.209  98.059  98.714  99.277  99.564  99.813  99.910
##                       Dim.22  Dim.23  Dim.24  Dim.25
## Variance               0.011   0.006   0.004   0.002
## % of var.              0.046   0.023   0.014   0.007
## Cumulative % of var.  99.955  99.979  99.993 100.000
## 
## Individuals (the 10 first)
##                        Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2  
## Lonzo Ball         |  6.220 | -0.362  0.010  0.003 | -0.139  0.002  0.001 |
## Meyers Leonard     |  5.490 |  2.912  0.649  0.281 | -3.535  1.418  0.415 |
## Wendell Carter Jr. |  4.044 |  3.305  0.836  0.668 |  0.783  0.070  0.038 |
## Derrick Williams   |  4.779 |  1.068  0.087  0.050 |  1.313  0.196  0.075 |
## Davion Mitchell    |  4.911 | -3.464  0.919  0.497 | -1.853  0.389  0.142 |
## Moses Moody        |  3.425 | -1.599  0.196  0.218 |  1.186  0.160  0.120 |
## Noah Vonleh        |  3.293 |  1.038  0.083  0.099 |  0.048  0.000  0.000 |
## Markieff Morris    |  4.397 |  1.608  0.198  0.134 | -3.641  1.505  0.686 |
## Joshua Primo       |  5.795 | -1.515  0.176  0.068 | -4.202  2.003  0.526 |
## Josh Jackson       |  4.144 |  1.320  0.133  0.101 |  3.011  1.029  0.528 |
##                     Dim.3    ctr   cos2  
## Lonzo Ball         -1.249  0.451  0.040 |
## Meyers Leonard      0.266  0.021  0.002 |
## Wendell Carter Jr.  0.907  0.238  0.050 |
## Derrick Williams    0.916  0.243  0.037 |
## Davion Mitchell    -1.563  0.707  0.101 |
## Moses Moody         1.641  0.780  0.230 |
## Noah Vonleh         0.352  0.036  0.011 |
## Markieff Morris     0.222  0.014  0.003 |
## Joshua Primo       -0.484  0.068  0.007 |
## Josh Jackson        0.003  0.000  0.000 |
## 
## Variables (the 10 first)
##                       Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## dunk_made          |  0.835  8.802  0.697 |  0.336  2.117  0.113 | -0.025
## dunk_attempts      |  0.829  8.675  0.687 |  0.338  2.140  0.114 | -0.025
## dunk_pct           |  0.334  1.412  0.112 | -0.120  0.269  0.014 |  0.228
## rim_made           |  0.584  4.305  0.341 |  0.672  8.468  0.452 | -0.088
## rim_attempts       |  0.451  2.571  0.203 |  0.740 10.243  0.547 | -0.093
## rim_pct            |  0.724  6.625  0.524 | -0.148  0.409  0.022 | -0.001
## rim_asted          |  0.767  7.441  0.589 | -0.354  2.351  0.126 |  0.182
## other2pt_made      |  0.106  0.141  0.011 |  0.601  6.772  0.362 |  0.560
## other2pt_attempts  |  0.102  0.131  0.010 |  0.651  7.924  0.423 |  0.484
## other2pt_pct       |  0.007  0.001  0.000 | -0.045  0.039  0.002 |  0.359
##                       ctr   cos2  
## dunk_made           0.029  0.001 |
## dunk_attempts       0.030  0.001 |
## dunk_pct            2.485  0.052 |
## rim_made            0.370  0.008 |
## rim_attempts        0.410  0.009 |
## rim_pct             0.000  0.000 |
## rim_asted           1.578  0.033 |
## other2pt_made      14.964  0.313 |
## other2pt_attempts  11.203  0.235 |
## other2pt_pct        6.140  0.129 |
pca2 <- prcomp(df_cbb.sample, center = TRUE, scale = TRUE)
summary(pca2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.8233 2.3577 1.45399 1.29215 1.18882 1.10678 1.00292
## Proportion of Variance 0.3066 0.2138 0.08131 0.06422 0.05436 0.04711 0.03869
## Cumulative Proportion  0.3066 0.5204 0.60167 0.66589 0.72025 0.76736 0.80605
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.98328 0.87000 0.75739 0.66985 0.65034 0.64385 0.56902
## Proportion of Variance 0.03719 0.02911 0.02206 0.01726 0.01627 0.01594 0.01245
## Cumulative Proportion  0.84323 0.87234 0.89441 0.91166 0.92793 0.94388 0.95633
##                           PC15    PC16    PC17    PC18    PC19    PC20   PC21
## Standard deviation     0.51470 0.49095 0.41133 0.40183 0.37408 0.26745 0.2038
## Proportion of Variance 0.01019 0.00927 0.00651 0.00621 0.00538 0.00275 0.0016
## Cumulative Proportion  0.96652 0.97579 0.98230 0.98851 0.99389 0.99664 0.9982
##                          PC22    PC23    PC24    PC25    PC26
## Standard deviation     0.1527 0.10681 0.07652 0.05949 0.04172
## Proportion of Variance 0.0009 0.00044 0.00023 0.00014 0.00007
## Cumulative Proportion  0.9991 0.99957 0.99980 0.99993 1.00000
df_cluster <- as.data.frame(-pca2$x[, 1:2])
df_cluster
##                                  PC1           PC2
## Lonzo Ball               -0.30972840  0.0007053551
## Meyers Leonard            2.74024056  3.6508169349
## Wendell Carter Jr.        3.36498747 -0.7216499359
## Derrick Williams          1.03830203 -1.0304451116
## Davion Mitchell          -3.51406172  1.6468468321
## Moses Moody              -1.46960252 -1.4190061757
## Noah Vonleh               1.11130925 -0.1751834144
## Markieff Morris           1.29699896  4.0373945397
## Joshua Primo             -1.55551858  3.7707023835
## Josh Jackson              1.48957168 -3.0475464620
## Willie Cauley-Stein       2.65175786  4.0213075524
## De'Aaron Fox             -0.87066774 -3.3180112387
## Brandon Miller           -2.00076132 -1.9889065138
## Coby White               -3.35981962 -0.9338304491
## Miles Bridges            -0.51099544  0.8957909493
## CJ McCollum              -3.90112719 -0.5500261983
## Nerlens Noel              5.45383914 -0.7991793564
## Steven Adams              3.66839570  2.7680962536
## Denzel Valentine         -3.08887719  3.2154062834
## Cody Zeller               2.02496946 -0.5680230150
## Zion Williamson           5.07447989 -4.6504795504
## Cameron Johnson          -1.47106716  2.1704255345
## Keegan Murray             1.59473589  0.7127863270
## Kendall Marshall         -4.60320232  1.8524037126
## Collin Sexton            -2.52183153 -3.9991906129
## Kyrie Irving             -3.77122159 -3.0033308087
## Cade Cunningham          -2.90919016 -4.4345298643
## Stanley Johnson          -1.16931864 -1.1296582673
## Malik Monk               -2.28778612 -1.6797126865
## Anthony Davis             6.73630616 -1.2629548886
## Markelle Fultz           -2.05054203 -6.2356705555
## Julius Randle             2.20418418 -3.0357101236
## Jett Howard              -2.91653599  1.5725624278
## Mo Bamba                  5.12168936 -0.6472841174
## Rui Hachimura             0.89990828  2.6695528608
## Jalen Duren               5.98165537 -0.5776256581
## Jabari Smith Jr.         -1.62886867 -1.4477810399
## Jaylen Brown             -0.86593793 -2.2952001302
## Michael Kidd-Gilchrist    1.49000302 -0.5168492854
## Anthony Black            -1.20227004 -1.7148572319
## Zach LaVine              -1.60422247  2.6481909219
## Jimmer Fredette          -4.12885438  0.1532925420
## Jordan Hawkins           -3.13721836  2.6918367917
## Wes Johnson               1.50316126 -1.3047269862
## Marvin Bagley III         5.65427831 -4.5448753922
## Aaron Gordon              2.71643469 -0.5168844660
## Romeo Langford           -0.73466257 -2.2444498661
## RJ Barrett               -0.08315247 -5.0528618008
## Cason Wallace            -2.34104930  0.2152815400
## Patrick Patterson         2.53963602  2.1109010563
## Evan Turner              -1.49673522 -0.4578015058
## Ekpe Udoh                 2.87709283 -2.0867754797
## Jabari Parker             2.40112005 -3.8466488862
## Donovan Mitchell         -3.15753551  1.8102604526
## Dereck Lively II          5.06532407  4.9338352381
## Buddy Hield              -3.54777620  1.4535258538
## Derrick Favors            5.01881131 -0.5372658952
## Ben McLemore              0.24239908 -0.1870327074
## Franz Wagner             -1.24385093  2.2215940450
## Onyeka Okongwu            5.79890613 -2.6047417619
## Austin Rivers            -2.95329868 -1.3791937733
## Myles Turner              1.86401362  1.8030642874
## D'Angelo Russell         -2.98270993 -2.6314533501
## Harrison Barnes          -1.11071750  0.4675508975
## Isaac Okoro               0.42993503 -0.2975331821
## Patrick Williams         -0.12807727  1.3341060184
## Mikal Bridges            -1.20162590  3.5903451628
## Shai Gilgeous-Alexander  -2.04551424 -2.3234644917
## Jarrett Culver           -1.51221150  0.0681535683
## Jayson Tatum             -0.78010288 -2.6444271533
## Brandon Knight           -3.54151495 -2.0093567331
## Lauri Markkanen          -0.18748839  0.1368727294
## Domantas Sabonis          1.67699707  1.2190984214
## Andrew Wiggins            0.36542307 -2.6025198229
## Jamal Murray             -2.37633667 -1.7128111826
## Paul George              -2.10747433  0.1876406410
## Xavier Henry             -1.24518982  0.8915202286
## Deandre Ayton             6.32878662 -3.9424880892
## Johnny Davis             -1.33669032  0.1478916084
## Gradey Dick              -1.44740283  1.2927758040
## Cameron Payne            -4.48285978 -1.3236954751
## Ziaire Williams          -1.78971475  0.2486334187
## Otto Porter Jr.           0.04885497  1.8356037020
## Scottie Barnes           -0.41005879  0.0002825293
## Ochai Agbaji             -1.65972813  3.1665060860
## Jeremy Lamb              -0.50899046  2.6527408299
## Jerome Robinson          -3.05834301  0.2586574385
## T.J. Warren               1.08912829 -0.3669642309
## Jaden Ivey               -1.86477689  0.0773337674
## Ben Simmons               3.55778734 -6.3959006442
## Damian Lillard           -4.29694802 -0.3843340708
## P.J. Washington           0.33949853  1.0835362435
## Trey Burke               -3.60982350 -0.5620071724
## John Wall                -1.73494610 -3.9455798842
## Luke Kennard             -2.76829877  0.9772815337
## Jalen Suggs              -1.57680464 -1.3826079667
## Alex Len                  3.60602390  2.2253867363
## De'Andre Hunter          -0.77514281  2.0904369415
## Dennis Smith Jr.         -2.43263222 -4.0080211081
## Brandon Ingram           -1.66287033 -1.8607679236
## Marcus Smart             -2.59113529 -1.9709358714
## Aaron Nesmith            -3.28346405  1.6855751483
## Doug McDermott           -1.01124840  1.4023518631
## Marcus Morris             0.73895771  2.6323130400
## Jonathan Isaac            1.48927680  0.4512240082
## Justise Winslow          -0.36808543 -0.0140660045
## Evan Mobley               4.26687599 -2.6519044433
## Bam Adebayo               5.99686501 -1.1625539440
## Shabazz Muhammad          0.36507564 -2.1338042145
## Cole Aldrich              2.78307370  3.9300845772
## Jaren Jackson Jr.         1.49089242  1.7016335898
## Anthony Bennett           3.08683149 -0.9658417837
## Victor Oladipo           -0.63903222  2.5888631078
## Kris Dunn                -2.69748838  0.0951825819
## John Henson               2.97155356  3.1904829161
## Karl-Anthony Towns        2.08103954  1.1196065654
## Klay Thompson            -3.84131304  0.2800845197
## Nik Stauskas             -3.13687982  1.8802991525
## Kelly Olynyk              0.73492325  3.8811181762
## Jakob Poeltl              3.07463033  1.0055357287
## Devin Vassell            -0.72773722  4.1046446591
## Marquese Chriss           2.77746275 -0.4521718014
## Cam Reddish              -4.14985479  0.0842589880
## Kemba Walker             -3.18777727 -0.5896508288
## Jalen Williams           -1.80913049  1.6958213307
## Al-Farouq Aminu           1.02295866 -0.6557701582
## Kevin Knox               -0.63604976 -0.7501807107
## Anthony Edwards          -1.40251473 -2.6178813170
## Greg Monroe               0.55772278 -0.6192425231
## Tristan Thompson          4.60556911 -2.1556621458
## James Bouknight          -1.52674532  0.8368198759
## Trae Young               -6.66738911 -7.9875515113
## Terrence Ross            -1.18817176  2.5860223513
## Jalen Smith               2.10897015  1.8201712718
## Jarace Walker             1.30148259  1.2591517315
## Taylor Hendricks          1.15461505  0.2239571117
## DeMarcus Cousins          4.42466023 -2.5278733794
## Gordon Hayward           -0.68966882  0.8690154013
## Obi Toppin                3.01791562  0.9098400603
## Alec Burks               -0.74320461 -2.0264013166
## Kentavious Caldwell-Pope -2.25315004  0.8810443210
## Zach Collins              2.23468578  2.2257336106
## Chet Holmgren             4.15750678  0.0605690899
## Taurean Prince           -1.44792106  3.7467662602
## Ed Davis                  3.31971345  3.1931177330
## Trey Lyles                2.26213434  2.3189123450
## Andre Drummond            5.95363392  0.5511351054
## Devin Booker             -1.28579904  3.5063787159
## Joel Embiid               4.49751364  0.3052975967
## Frank Kaminsky           -0.20730366  4.1181677131
## Jeremy Sochan             0.86229364  1.9652843807
## Kira Lewis Jr.           -3.26278428 -0.0954317440
## Michael Carter-Williams  -2.91272861  0.8684079705
## Chris Duarte             -1.92568686  1.3515954340
## Bennedict Mathurin       -1.07663046  1.0805356809
## Jahlil Okafor             5.47754196 -2.7772955807
## Ja Morant                -2.49001353 -3.1186017067
## Dion Waiters             -1.92316128  2.9143303356
## Paolo Banchero            0.71365922 -2.7656057765
## Elfrid Payton            -2.00506164 -0.8904543832
## Bradley Beal             -0.88415187 -0.6225122311
## Thomas Robinson           0.41276682  2.4752367530
## Jaxson Hayes              6.10602196  1.9333235932
## Tyrese Haliburton        -2.28374305  2.7467761148
## Tyler Herro              -1.81846173  0.4356215772
fviz_nbclust(df_cluster, kmeans, method = 'wss')

fviz_nbclust(df_cluster, kmeans, method = 'silhouette')

fviz_nbclust(df_cluster, kmeans, method = 'gap_stat')

k <- 15
df_cbb.kmeans <- kmeans(df_cluster, centers = k, nstart = 50)
df_cbb.kmeans2 <- kmeans(df_cluster, centers = 10, nstart = 50)
df_cbb.kmeans3 <- kmeans(df_cluster, centers = 5, nstart = 50)

df_cbb.kmeans
## K-means clustering with 15 clusters of sizes 14, 10, 10, 1, 13, 15, 15, 12, 8, 6, 8, 9, 24, 15, 5
## 
## Cluster means:
##           PC1        PC2
## 1  -3.0038961  1.6844905
## 2  -0.7038180 -2.2727211
## 3   5.5023843 -0.2136538
## 4  -6.6673891 -7.9875515
## 5  -3.5629264 -0.2578629
## 6  -2.3589000 -1.9212256
## 7  -1.3010475  2.9669816
## 8   2.1039278  1.5172029
## 9   3.3507603  3.4891410
## 10 -1.9553824 -4.6126423
## 11  0.5984250  2.9518339
## 12  5.0209873 -3.5834690
## 13 -0.9536002  0.3927310
## 14  1.6571399 -0.5010737
## 15  1.9371256 -2.9564573
## 
## Clustering vector:
##               Lonzo Ball           Meyers Leonard       Wendell Carter Jr. 
##                       13                        9                       14 
##         Derrick Williams          Davion Mitchell              Moses Moody 
##                       14                        1                        6 
##              Noah Vonleh          Markieff Morris             Joshua Primo 
##                       14                       11                        7 
##             Josh Jackson      Willie Cauley-Stein             De'Aaron Fox 
##                       15                        9                        2 
##           Brandon Miller               Coby White            Miles Bridges 
##                        6                        5                       13 
##              CJ McCollum             Nerlens Noel             Steven Adams 
##                        5                        3                        9 
##         Denzel Valentine              Cody Zeller          Zion Williamson 
##                        1                       14                       12 
##          Cameron Johnson            Keegan Murray         Kendall Marshall 
##                        7                        8                        1 
##            Collin Sexton             Kyrie Irving          Cade Cunningham 
##                       10                        6                       10 
##          Stanley Johnson               Malik Monk            Anthony Davis 
##                        2                        6                        3 
##           Markelle Fultz            Julius Randle              Jett Howard 
##                       10                       15                        1 
##                 Mo Bamba            Rui Hachimura              Jalen Duren 
##                        3                       11                        3 
##         Jabari Smith Jr.             Jaylen Brown   Michael Kidd-Gilchrist 
##                        6                        2                       14 
##            Anthony Black              Zach LaVine          Jimmer Fredette 
##                        2                        7                        5 
##           Jordan Hawkins              Wes Johnson        Marvin Bagley III 
##                        1                       14                       12 
##             Aaron Gordon           Romeo Langford               RJ Barrett 
##                       14                        2                       10 
##            Cason Wallace        Patrick Patterson              Evan Turner 
##                        5                        8                       13 
##                Ekpe Udoh            Jabari Parker         Donovan Mitchell 
##                       15                       15                        1 
##         Dereck Lively II              Buddy Hield           Derrick Favors 
##                        9                        1                        3 
##             Ben McLemore             Franz Wagner           Onyeka Okongwu 
##                       13                        7                       12 
##            Austin Rivers             Myles Turner         D'Angelo Russell 
##                        6                        8                        6 
##          Harrison Barnes              Isaac Okoro         Patrick Williams 
##                       13                       14                       13 
##            Mikal Bridges  Shai Gilgeous-Alexander           Jarrett Culver 
##                        7                        6                       13 
##             Jayson Tatum           Brandon Knight          Lauri Markkanen 
##                        2                        6                       13 
##         Domantas Sabonis           Andrew Wiggins             Jamal Murray 
##                        8                        2                        6 
##              Paul George             Xavier Henry            Deandre Ayton 
##                       13                       13                       12 
##             Johnny Davis              Gradey Dick            Cameron Payne 
##                       13                       13                        5 
##          Ziaire Williams          Otto Porter Jr.           Scottie Barnes 
##                       13                       11                       13 
##             Ochai Agbaji              Jeremy Lamb          Jerome Robinson 
##                        7                        7                        5 
##              T.J. Warren               Jaden Ivey              Ben Simmons 
##                       14                       13                       12 
##           Damian Lillard          P.J. Washington               Trey Burke 
##                        5                       13                        5 
##                John Wall             Luke Kennard              Jalen Suggs 
##                       10                        1                        6 
##                 Alex Len          De'Andre Hunter         Dennis Smith Jr. 
##                        9                        7                       10 
##           Brandon Ingram             Marcus Smart            Aaron Nesmith 
##                        6                        6                        1 
##           Doug McDermott            Marcus Morris           Jonathan Isaac 
##                       13                       11                       14 
##          Justise Winslow              Evan Mobley              Bam Adebayo 
##                       13                       12                        3 
##         Shabazz Muhammad             Cole Aldrich        Jaren Jackson Jr. 
##                        2                        9                        8 
##          Anthony Bennett           Victor Oladipo                Kris Dunn 
##                       14                        7                        5 
##              John Henson       Karl-Anthony Towns            Klay Thompson 
##                        9                        8                        5 
##             Nik Stauskas             Kelly Olynyk             Jakob Poeltl 
##                        1                       11                        8 
##            Devin Vassell          Marquese Chriss              Cam Reddish 
##                        7                       14                        5 
##             Kemba Walker           Jalen Williams          Al-Farouq Aminu 
##                        5                        1                       14 
##               Kevin Knox          Anthony Edwards              Greg Monroe 
##                       13                        2                       14 
##         Tristan Thompson          James Bouknight               Trae Young 
##                       12                       13                        4 
##            Terrence Ross              Jalen Smith            Jarace Walker 
##                        7                        8                        8 
##         Taylor Hendricks         DeMarcus Cousins           Gordon Hayward 
##                       14                       12                       13 
##               Obi Toppin               Alec Burks Kentavious Caldwell-Pope 
##                        8                        2                        1 
##             Zach Collins            Chet Holmgren           Taurean Prince 
##                        8                        3                        7 
##                 Ed Davis               Trey Lyles           Andre Drummond 
##                        9                        8                        3 
##             Devin Booker              Joel Embiid           Frank Kaminsky 
##                        7                        3                       11 
##            Jeremy Sochan           Kira Lewis Jr.  Michael Carter-Williams 
##                       11                        5                        1 
##             Chris Duarte       Bennedict Mathurin            Jahlil Okafor 
##                        1                       13                       12 
##                Ja Morant             Dion Waiters           Paolo Banchero 
##                        6                        7                       15 
##            Elfrid Payton             Bradley Beal          Thomas Robinson 
##                        6                       13                       11 
##             Jaxson Hayes        Tyrese Haliburton              Tyler Herro 
##                        3                        7                       13 
## 
## Within cluster sum of squares by cluster:
##  [1] 11.914071  6.459357 14.072811  0.000000  8.126588 12.497657  9.151351
##  [8]  6.825062  9.318869  9.067874  7.703117 21.895704 19.720030 14.760289
## [15]  4.467114
##  (between_SS / total_SS =  93.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
fviz_cluster(df_cbb.kmeans, df_cluster, labelsize = 4, pointsize = 1, show.clust.cent = FALSE, repel = TRUE,
             xlab = "Dimension 1", ylab = "Dimension 2", main = "Clustering with K-means") + scale_y_reverse()

cluster_assignments <- df_cbb.kmeans$cluster
cluster_df15 <- data.frame(value = cluster_assignments, name = names(cluster_assignments))

cluster_assignments2 <- df_cbb.kmeans2$cluster
cluster_df10 <- data.frame(value = cluster_assignments2, name = names(cluster_assignments2))

cluster_assignments3 <- df_cbb.kmeans3$cluster
cluster_df5 <- data.frame(value = cluster_assignments3, name = names(cluster_assignments3))

cluster_df15 <- as.tibble(cluster_df15)
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` instead.
## ℹ The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
cluster_df10 <- as.tibble(cluster_df10)
cluster_df5 <- as.tibble(cluster_df5)

cluster_df15 <- cluster_df15 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df10 <- cluster_df10 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)
cluster_df5 <- cluster_df5 |> rename(pc_cluster = value) |> 
  relocate(pc_cluster, .after = name)

cluster_df15
## # A tibble: 165 × 2
##    name               pc_cluster
##    <chr>                   <int>
##  1 Lonzo Ball                 13
##  2 Meyers Leonard              9
##  3 Wendell Carter Jr.         14
##  4 Derrick Williams           14
##  5 Davion Mitchell             1
##  6 Moses Moody                 6
##  7 Noah Vonleh                14
##  8 Markieff Morris            11
##  9 Joshua Primo                7
## 10 Josh Jackson               15
## # ℹ 155 more rows
df_cbb.sample$group <- df_cbb.kmeans$cluster
df_cbb.sample <- df_cbb.sample |> relocate(group, .before = dunk_made)
head(df_cbb.sample)
##                    group  dunk_made dunk_attempts dunk_pct rim_made
## Lonzo Ball            13 1.02777778    1.11111111    0.925 2.611111
## Meyers Leonard         9 0.78461538    0.86153846    0.911 1.676923
## Wendell Carter Jr.    14 1.51351351    1.56756757    0.966 3.243243
## Derrick Williams      14 0.81159420    0.86956522    0.933 1.956522
## Davion Mitchell        1 0.03333333    0.03333333    1.000 1.166667
## Moses Moody            6 0.31250000    0.34375000    0.909 1.937500
##                    rim_attempts rim_pct rim_asted other2pt_made
## Lonzo Ball             3.333333   0.783     0.511     0.4166667
## Meyers Leonard         2.215385   0.757     0.798     0.9230769
## Wendell Carter Jr.     4.729730   0.686     0.608     1.0810811
## Derrick Williams       2.724638   0.718     0.585     0.7101449
## Davion Mitchell        1.850000   0.631     0.186     0.4166667
## Moses Moody            3.437500   0.564     0.371     1.4062500
##                    other2pt_attempts other2pt_pct other2pt_asted fg2_pct
## Lonzo Ball                 0.8055556        0.517          0.200   0.732
## Meyers Leonard             2.0923077        0.441          0.500   0.585
## Wendell Carter Jr.         2.6216216        0.412          0.500   0.586
## Derrick Williams           1.7101449        0.415          0.388   0.598
## Davion Mitchell            0.9500000        0.439          0.080   0.519
## Moses Moody                3.5625000        0.395          0.311   0.478
##                    fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g
## Lonzo Ball               2.2        5.4     0.738     0.4074074    36      1.8
## Meyers Leonard           0.0        0.2     1.000     0.0000000    65      1.6
## Wendell Carter Jr.       0.5        1.2     0.947     0.4166667    37      3.4
## Derrick Williams         0.7        1.3     0.929     0.5384615    69      5.9
## Davion Mitchell          1.6        4.1     0.603     0.3902439    60      1.6
## Moses Moody              1.8        5.1     0.897     0.3529412    32      4.7
##                    fta_per_g ast_per_g_college orb_per_g drb_per_g stl_per_g
## Lonzo Ball               2.7               7.6       0.9       5.1       1.8
## Meyers Leonard           2.2               0.7       1.3       3.4       0.3
## Wendell Carter Jr.       4.5               2.0       2.9       6.1       0.8
## Derrick Williams         8.2               0.9       2.5       5.2       0.8
## Davion Mitchell          2.4               4.7       0.4       2.3       1.7
## Moses Moody              5.8               1.6       2.0       3.8       1.0
##                    blk_per_g tov_per_g pts_per_g_college
## Lonzo Ball               0.8       2.5              14.6
## Meyers Leonard           1.1       1.4               7.7
## Wendell Carter Jr.       2.1       2.0              13.5
## Derrick Williams         0.7       2.3              17.8
## Davion Mitchell          0.4       2.3              12.0
## Moses Moody              0.7       1.6              16.8
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), mean)) |> print(n = 15, width = Inf)
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0.154         0.176    0.816    0.899         1.44   0.623
##  2     2     0.620         0.683    0.904    2.60          4.07   0.639
##  3     3     2.06          2.16     0.959    3.51          4.49   0.787
##  4     4     0             0        0        3.28          6.28   0.522
##  5     5     0.126         0.142    0.904    1.08          1.75   0.615
##  6     6     0.318         0.348    0.862    2.09          3.38   0.626
##  7     7     0.271         0.301    0.914    1.04          1.55   0.682
##  8     8     0.728         0.781    0.925    2.10          2.86   0.738
##  9     9     0.784         0.833    0.943    1.49          2.04   0.745
## 10    10     0.600         0.657    0.868    3.20          5.16   0.620
## 11    11     0.380         0.431    0.906    1.34          1.86   0.727
## 12    12     2.02          2.14     0.943    4.93          6.48   0.760
## 13    13     0.487         0.536    0.913    1.69          2.53   0.670
## 14    14     0.978         1.07     0.921    2.65          3.88   0.682
## 15    15     1.24          1.36     0.914    3.20          4.78   0.675
##    rim_asted other2pt_made other2pt_attempts other2pt_pct other2pt_asted fg2_pct
##        <dbl>         <dbl>             <dbl>        <dbl>          <dbl>   <dbl>
##  1     0.288         0.590              1.47        0.401         0.168    0.496
##  2     0.358         1.52               4.21        0.360         0.200    0.502
##  3     0.634         0.905              2.59        0.357         0.469    0.630
##  4     0.114         1.19               2.78        0.427         0.026    0.493
##  5     0.232         0.850              2.20        0.383         0.194    0.480
##  6     0.272         1.24               3.32        0.368         0.138    0.499
##  7     0.468         0.616              1.50        0.397         0.296    0.546
##  8     0.576         0.894              2.16        0.414         0.503    0.6  
##  9     0.705         0.636              1.65        0.351         0.633    0.581
## 10     0.247         1.80               4.82        0.362         0.0985   0.501
## 11     0.589         0.717              1.69        0.432         0.437    0.581
## 12     0.539         1.63               3.99        0.415         0.369    0.622
## 13     0.429         0.916              2.44        0.371         0.282    0.528
## 14     0.502         0.989              2.65        0.364         0.403    0.564
## 15     0.420         1.98               5.22        0.376         0.292    0.52 
##    fg3_per_g fg3a_per_g fg3_asted fg3_pct_per_g games ft_per_g fta_per_g
##        <dbl>      <dbl>     <dbl>         <dbl> <dbl>    <dbl>     <dbl>
##  1    1.85        4.91      0.724        0.374   73.5     2.37      3.01
##  2    1.13        3.53      0.750        0.320   37.2     4.32      5.79
##  3    0.19        0.58      0.364        0.0888  32.3     2.62      4.11
##  4    3.7        10.3       0.263        0.359   32       7.4       8.6 
##  5    1.9         5.18      0.634        0.365   81.4     3.55      4.45
##  6    1.87        4.9       0.667        0.375   41.8     4.04      5.22
##  7    1.4         3.69      0.870        0.379   73.3     1.75      2.27
##  8    0.508       1.43      0.807        0.345   54.5     2.72      3.7 
##  9    0.0125      0.125     0.375        0.0312  72.1     1.62      2.7 
## 10    1.72        4.8       0.546        0.355   32       4.72      6.43
## 11    0.45        1.31      0.935        0.414   95.5     2.31      3.35
## 12    0.233       0.733     0.624        0.149   34.1     4.01      6.38
## 13    1.47        3.97      0.809        0.366   53.9     2.98      3.88
## 14    0.673       1.88      0.834        0.325   45.4     3.43      4.9 
## 15    0.7         2.02      0.921        0.314   37       3.82      5.5 
##    ast_per_g_college orb_per_g drb_per_g stl_per_g blk_per_g tov_per_g
##                <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
##  1              2.96     0.85       3.24     1.19      0.414      1.77
##  2              2.41     1.56       4.03     1.23      0.58       2.43
##  3              1.05     2.73       5.84     0.98      2.97       1.75
##  4              8.7      0.4        3.5      1.7       0.3        5.2 
##  5              3.95     0.715      3.34     1.55      0.415      2.65
##  6              3.67     1.17       4.1      1.37      0.553      2.58
##  7              1.76     1.06       2.99     1.06      0.493      1.35
##  8              1.17     2.17       4.9      0.633     1.64       1.54
##  9              0.8      2.28       4.39     0.538     2.09       1.26
## 10              5        1.03       4.32     1.43      0.567      3.43
## 11              1.32     1.79       4.08     0.812     0.712      1.39
## 12              1.9      3.49       6.14     1.13      1.84       2.3 
## 13              2.28     1.36       4.38     1.10      0.608      2.07
## 14              1.53     2.47       5.08     1.07      1.29       2.04
## 15              2.3      2.82       6        1.04      1.54       2.48
##    pts_per_g_college
##                <dbl>
##  1             13.0 
##  2             16.4 
##  3             12.0 
##  4             27.4 
##  5             16.6 
##  6             17.2 
##  7             11.1 
##  8             12.5 
##  9              8.34
## 10             20.0 
## 11             10.5 
## 12             17.9 
## 13             14.4 
## 14             14.4 
## 15             16.3
df_cbb.sample |> group_by(group) |>
  summarize(across(everything(), median))
## # A tibble: 15 × 27
##    group dunk_made dunk_attempts dunk_pct rim_made rim_attempts rim_pct
##    <int>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>   <dbl>
##  1     1     0.140         0.202    0.904    0.943         1.43   0.627
##  2     2     0.619         0.723    0.923    2.68          4.07   0.641
##  3     3     2.24          2.31     0.958    3.63          4.56   0.776
##  4     4     0             0        0        3.28          6.28   0.522
##  5     5     0.123         0.123    1        1.04          1.65   0.609
##  6     6     0.3           0.333    0.903    2             3.25   0.634
##  7     7     0.302         0.32     0.902    1.08          1.52   0.692
##  8     8     0.718         0.767    0.932    2.03          2.85   0.728
##  9     9     0.721         0.783    0.954    1.34          1.78   0.741
## 10    10     0.624         0.644    0.925    3.13          4.98   0.629
## 11    11     0.358         0.381    0.926    1.34          1.82   0.735
## 12    12     1.94          2.08     0.951    4.82          6.64   0.762
## 13    13     0.473         0.520    0.907    1.69          2.59   0.663
## 14    14     0.85          0.9      0.933    2.59          3.69   0.686
## 15    15     1.05          1.13     0.925    3.3           4.92   0.676
## # ℹ 20 more variables: rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>
df_career_stats |> filter(player == "Jaylen Brown") |>
  select(player, pick_overall, year, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## # A tibble: 1 × 8
##   player       pick_overall  year pts_per_g trb_per_g ast_per_g  vorp     g
##   <chr>               <dbl> <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
## 1 Jaylen Brown            3  2016      18.6       5.3       2.4   9.7   540
df_career_stats |> filter(year >= 2010)|> 
  group_by(pick_overall) |> 
  summarize(across(everything(), function(x) mean(x))) |>
  select(pick_overall, pts_per_g, trb_per_g, ast_per_g, vorp, g)
## Warning: There were 122 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), function(x) mean(x))`.
## ℹ In group 1: `pick_overall = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 121 remaining warnings.
## # A tibble: 61 × 6
##    pick_overall pts_per_g trb_per_g ast_per_g  vorp     g
##           <dbl>     <dbl>     <dbl>     <dbl> <dbl> <dbl>
##  1            1     18.9       6.65      4.28 14.2   380.
##  2            2     14.7       5.14      3.19  3.91  338.
##  3            3     17.4       6.58      3.31 12.0   420.
##  4            4     12         5.32      1.91  3     372.
##  5            5     12.5       4.61      3.36  4.84  377.
##  6            6      9.92      4.5       2.17  6.57  346.
##  7            7     12.4       5.08      2.51  3.77  430.
##  8            8      9.21      3.31      1.91  1.71  390.
##  9            9     10.3       4.76      2.31  5.61  418.
## 10           10      9.74      3.46      2.13  5.14  370.
## # ℹ 51 more rows
df_cluster2 <- df_cbb_scaled

df_cluster2$name <- rownames(df_cbb)
df_cluster2 <- df_cluster2 |> relocate(name, .before = dunk_made)

df_cluster2 <- column_to_rownames(df_cluster2, var = "name")

k1 <- 15
k2 <- 10
k3 <- 5

k15 <- kmeans(df_cluster2, centers = k1, nstart = 50)
k10 <-  kmeans(df_cluster2, centers = k2, nstart = 50)
k5 <- kmeans(df_cluster2, centers = k3, nstart = 50)

temp_assign <- k15$cluster

combine <- function(df, kmeans) {
  temp_assign <- kmeans$cluster
  df_temp <- as.tibble(data.frame(value = temp_assign, name = names(temp_assign)))
  if(!("all_cluster" %in% colnames(df_temp))) {
    df_temp <- df_temp |> rename(all_cluster = value)
  }
  cluster_df <- left_join(df, df_temp, by = c("name"))
  cluster_df <- cluster_df |> relocate(pc_cluster, .after = all_cluster)
  # cluster_df <- cluster_df |> group_by(all_cluster, pc_cluster) |> summarize(n = n())
  # return(cluster_df)
  return(cluster_df)
}

groups15 <- combine(cluster_df15, k15) |> arrange(pc_cluster, all_cluster)
groups10 <- combine(cluster_df10, k10)
groups5 <- combine(cluster_df5, k5)

groups15
## # A tibble: 165 × 3
##    name                    all_cluster pc_cluster
##    <chr>                         <int>      <int>
##  1 Michael Carter-Williams           1          1
##  2 Kendall Marshall                  5          1
##  3 Denzel Valentine                  8          1
##  4 Jett Howard                       8          1
##  5 Jordan Hawkins                    8          1
##  6 Donovan Mitchell                  8          1
##  7 Buddy Hield                       8          1
##  8 Aaron Nesmith                     8          1
##  9 Nik Stauskas                      8          1
## 10 Jalen Williams                    8          1
## # ℹ 155 more rows
df_cbb <- df_cbb |> rownames_to_column(var = "name")
df_groups <- left_join(df_cbb, groups15, by = c("name"))
df_cbb <- df_cbb |> column_to_rownames(var = "name")
library(ggforce)
df_groups <- df_groups |> 
  mutate(bust = if_else(name %in% bust_list, 1, 0),
         good = if_else(name %in% good_list, 1, 0))

df_groups |> group_by(pc_cluster) |>
  summarize(n = n(),
            across(everything(), mean)) |> 
  select(-name)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `pc_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 31
##    pc_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##         <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1          1    14     0.154         0.176    0.816    0.899         1.44
##  2          2    10     0.620         0.683    0.904    2.60          4.07
##  3          3    10     2.06          2.16     0.959    3.51          4.49
##  4          4     1     0             0        0        3.28          6.28
##  5          5    13     0.126         0.142    0.904    1.08          1.75
##  6          6    15     0.318         0.348    0.862    2.09          3.38
##  7          7    15     0.271         0.301    0.914    1.04          1.55
##  8          8    12     0.728         0.781    0.925    2.10          2.86
##  9          9     8     0.784         0.833    0.943    1.49          2.04
## 10         10     6     0.600         0.657    0.868    3.20          5.16
## 11         11     8     0.380         0.431    0.906    1.34          1.86
## 12         12     9     2.02          2.14     0.943    4.93          6.48
## 13         13    24     0.487         0.536    0.913    1.69          2.53
## 14         14    15     0.978         1.07     0.921    2.65          3.88
## 15         15     5     1.24          1.36     0.914    3.20          4.78
## # ℹ 24 more variables: rim_pct <dbl>, rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>,
## #   all_cluster <dbl>, bust <dbl>, good <dbl>
df_groups |> group_by(all_cluster) |>
  summarize(n = n(),
            across(everything(), mean)) |> 
  select(-name)
## Warning: There were 15 warnings in `summarize()`.
## The first warning was:
## ℹ In argument: `across(everything(), mean)`.
## ℹ In group 1: `all_cluster = 1`.
## Caused by warning in `mean.default()`:
## ! argument is not numeric or logical: returning NA
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.
## # A tibble: 15 × 31
##    all_cluster     n dunk_made dunk_attempts dunk_pct rim_made rim_attempts
##          <int> <dbl>     <dbl>         <dbl>    <dbl>    <dbl>        <dbl>
##  1           1    15    0.433         0.481     0.902     1.98         3.01
##  2           2     9    0.543         0.589     0.896     3.14         5.06
##  3           3     3    2.12          2.17      0.979     3.57         4.34
##  4           4    22    0.684         0.743     0.920     1.88         2.70
##  5           5     2    0             0         0         1.42         2.14
##  6           6     1    0             0         0         3.28         6.28
##  7           7     4    2.30          2.44      0.942     5.86         7.67
##  8           8    22    0.263         0.294     0.887     1.10         1.78
##  9           9    14    1.28          1.39      0.925     3.13         4.45
## 10          10     7    0.411         0.445     0.934     1.88         2.72
## 11          11    10    1.96          2.09      0.941     3.73         5.00
## 12          12    12    0.0783        0.0854    0.932     1.11         1.83
## 13          13    14    0.376         0.430     0.896     1.23         1.72
## 14          14    19    0.466         0.512     0.907     1.98         3.13
## 15          15    11    0.780         0.823     0.947     1.78         2.40
## # ℹ 24 more variables: rim_pct <dbl>, rim_asted <dbl>, other2pt_made <dbl>,
## #   other2pt_attempts <dbl>, other2pt_pct <dbl>, other2pt_asted <dbl>,
## #   fg2_pct <dbl>, fg3_per_g <dbl>, fg3a_per_g <dbl>, fg3_asted <dbl>,
## #   fg3_pct_per_g <dbl>, games <dbl>, ft_per_g <dbl>, fta_per_g <dbl>,
## #   ast_per_g_college <dbl>, orb_per_g <dbl>, drb_per_g <dbl>, stl_per_g <dbl>,
## #   blk_per_g <dbl>, tov_per_g <dbl>, pts_per_g_college <dbl>,
## #   pc_cluster <dbl>, bust <dbl>, good <dbl>
df_pc <- left_join(df_cluster |> rownames_to_column("name"), groups15, by = "name")
df_pc <- left_join(df_pc, df_groups |> select(name, bust, good), by = "name")

df_pc <- df_pc |> column_to_rownames("name")
df_pc <- df_pc |> 
  mutate(
    pc_cluster = as.factor(pc_cluster),
    all_cluster = as.factor(all_cluster)
  )

hulls <- df_pc |>
  group_by(pc_cluster) |>
  slice(chull(PC1, PC2))

p <- ggplot(data = df_pc, aes(x = PC1, y = PC2, color = pc_cluster)) + 
  geom_polygon(data = hulls, 
               aes(group = pc_cluster, color = pc_cluster, fill = pc_cluster), 
               alpha = 0.2) +
  geom_point(data = df_pc |> filter(bust == 1), size = 3, color = "black", shape = 10, show.legend = FALSE) + 
  geom_point(data = df_pc |> filter(good == 1), size = 3, color = "black", shape = 5, show.legend = FALSE) +
  geom_point() +
  scale_y_reverse() + 
  scale_y_reverse() +
  theme_minimal() +
  labs(title = "Clusters with Bad and Good Value players",
       x = "Dimension 1",
       y = "Dimension 2",
       color = "Cluster",
       fill = "Cluster"
       )
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
p

p2 <- p + 
  geom_label_repel(data = df_pc |> rownames_to_column(var = "name") |> filter(bust == 1 | good == 1),
                   aes(label = name),
                   size = 1.6,
                   max.overlaps = 20,
                   fill = NA, 
                   label.size = NA,
                   segment.size = 0.2
                   )

p2